# Table 1, SM1, SM2 
# descriptives 221012

library(network)
library(sna)
library(statnet)
library(dplyr)
library(RSiena)

#Set Directory
setwd('/Users/drschaef/Google Drive/Projects/Adriana/Raw data/Longitudinal Data/')
setwd('/Users/drschaef/My Drive/Projects/Adriana/Raw data/Longitudinal Data/')

load("~/Google Drive/Projects/Adriana/Papers/Homophily/allDatPreSIENA.RData")

### M_ data (N=1795 in raw files)
dat <- data.frame(readRDS('attrNoRaceM_220523.rds'),readRDS('attrRaceM_220829_noAMENA.rds')[,-1])
mat1M <-  readRDS("M_W1.network.rds")
mat2M <-  readRDS("M_W2.network.rds")
mat3M <-  readRDS("M_W3.network.rds")
#eca1 <- readRDS("ecaM1_220608.rds")
#eca2 <- readRDS("ecaM2_220608.rds")
skin <- readRDS("skinM_.rds")
# append skin tone measures to dat
rawM <- merge(x=dat, y=skin, all.x=T, all.y=F, by.x='MID', by.y='MFID')

dat <- data.frame(readRDS('attrNoRaceS_220523.rds'),readRDS('attrRaceS_220829_noAMENA.rds')[,-1])
mat1S <-  readRDS("S_W1.network.rds")
mat2S <-  readRDS("S_W2.network.rds")
mat3S <-  readRDS("S_W3.network.rds")
#eca1 <- readRDS("ecaS1_220608.rds")
#eca2 <- readRDS("ecaS2_220608.rds")
skin <- readRDS("skinS_.rds")
# append skin tone measures to dat
rawS <- merge(x=dat, y=skin, all.x=T, all.y=F, by.x='MID', by.y='MFID')

# identify the analytic sample
idsM <- unique(c(attM12$MID,attM23$MID))
(nM <- length(idsM))  # unique students in M_ sample
idsS <- unique(c(attS12$MID,attS23$MID))
(nS <- length(idsS))  # unique students in S_ sample
nM+nS  # total sample size

attM <- rawM[rawM$MID %in% idsM,]
attS <- rawS[rawS$MID %in% idsS,]
attM1 <- rawM[rawM$MID %in% idsM & rawM$W1PIDNA==0,]
attS1 <- rawS[rawS$MID %in% idsS & rawS$W1PIDNA==0,]

dat <- rawM
keep <- (dat$W1PIDNA==0) & !is.na(dat$chooseOne1) & dat$Gradecohort<12
outDeg <- rowSums(mat1M[keep, keep])
c(summary(outDeg), SD=sd(outDeg, na.rm=T))
keep <- (dat$W2PIDNA==0) & !is.na(dat$chooseOne2) & dat$Gradecohort<12
outDeg <- rowSums(mat2M[keep, keep])
c(summary(outDeg), SD=sd(outDeg, na.rm=T))
keep <- (dat$W3PIDNA==0) & !is.na(dat$chooseOne3) & dat$Gradecohort<12
outDeg <- rowSums(mat3M[keep, keep])
c(summary(outDeg), SD=sd(outDeg, na.rm=T))

dat <- rawS
keep <- (dat$W1PIDNA==0) & !is.na(dat$chooseOne1) & dat$Gradecohort<12
outDeg <- rowSums(mat1S[keep, keep])
c(summary(outDeg), SD=sd(outDeg, na.rm=T))
keep <- (dat$W2PIDNA==0) & !is.na(dat$chooseOne2) & dat$Gradecohort<12
outDeg <- rowSums(mat2S[keep, keep])
c(summary(outDeg), SD=sd(outDeg, na.rm=T))
keep <- (dat$W3PIDNA==0) & !is.na(dat$chooseOne3) & dat$Gradecohort<12
outDeg <- rowSums(mat3S[keep, keep])
c(summary(outDeg), SD=sd(outDeg, na.rm=T))


### racial descriptives
dat <- attM
# - proportion with any heritage
round(colSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='MULTI')],na.rm=T) / 
	colSums(!is.na(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='MULTI')]),na.rm=T), 3)
mean(dat$mono, na.rm=T)
mean(dat$multi, na.rm=T)
# 1=A, 2=B, 3=L, 4=W, 5=N, 6=O, 9=M
round(table(dat$monoRace) / sum(table(dat$monoRace)), 3) 
# number of categories
round(table(rowSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='O')],na.rm=T) ) /
	sum(table(rowSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='O')],na.rm=T) )), 3)
#table(rowSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='O')],na.rm=T),  dat$monoRace)
# 1=A, 2=B, 3=L, 4=W, 5=N, 6=O, 9=M, 10=Fluid
round(table(dat$chooseOne, useNA='always') / sum(table(dat$chooseOne, useNA='always')), 3)
aggregate(dat$PERLA, list(dat$chooseOne), function(x) mean(x, na.rm=T))

dat <- attS
# - proportion with any heritage
round(colSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='MULTI')],na.rm=T) / 
	colSums(!is.na(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='MULTI')]),na.rm=T), 3)
mean(dat$mono, na.rm=T)
mean(dat$multi, na.rm=T)
# 1=A, 2=B, 3=L, 4=W, 5=N, 6=O, 9=M
round(table(dat$monoRace) / sum(table(dat$monoRace)), 3) 
# number of categories
round(table(rowSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='O')],na.rm=T) ) /
	sum(table(rowSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='O')],na.rm=T) )), 3)
#table(rowSums(dat[,which(colnames(dat)=='A'):which(colnames(dat)=='O')],na.rm=T),  dat$monoRace)
# 1=A, 2=B, 3=L, 4=W, 5=N, 6=O, 9=M, 10=Fluid
round(table(dat$chooseOne, useNA='always') / sum(table(dat$chooseOne, useNA='always')), 3)
aggregate(dat$PERLA, list(dat$chooseOne), function(x) mean(x, na.rm=T))

### how many cases dropped b/c of missing current identification on race?
dat <- rawM
kept12 <- (dat$W1PIDNA==0 | dat$W2PIDNA==0) & !is.na(dat$chooseOne1) & !is.na(dat$chooseOne2) & dat$Gradecohort<12
kept12_ignoreRace <- (dat$W1PIDNA==0 & dat$W2PIDNA==0) & dat$Gradecohort<12
dropped12 <- kept12_ignoreRace - kept12
sum(kept12_ignoreRace)-sum(kept12)
kept23 <- (dat$W2PIDNA==0 | dat$W3PIDNA==0) & !is.na(dat$chooseOne2) & !is.na(dat$chooseOne3) & dat$Gradecohort<12
kept23_ignoreRace <- (dat$W2PIDNA==0 & dat$W3PIDNA==0) & dat$Gradecohort<12
dropped23 <- kept23_ignoreRace - kept23
sum(table(dropped12,dropped23)[2:3]) / nM   # 3.2%

dat <- rawS
kept12 <- (dat$W1PIDNA==0 | dat$W2PIDNA==0) & !is.na(dat$chooseOne1) & !is.na(dat$chooseOne2) & dat$Gradecohort<12
kept12_ignoreRace <- (dat$W1PIDNA==0 & dat$W2PIDNA==0) & dat$Gradecohort<12
dropped12 <- kept12_ignoreRace - kept12
sum(kept12_ignoreRace)-sum(kept12)
kept23 <- (dat$W2PIDNA==0 | dat$W3PIDNA==0) & !is.na(dat$chooseOne2) & !is.na(dat$chooseOne3) & dat$Gradecohort<12
kept23_ignoreRace <- (dat$W2PIDNA==0 & dat$W3PIDNA==0) & dat$Gradecohort<12
dropped23 <- kept23_ignoreRace - kept23
sum(table(dropped12,dropped23)[4:5]) / nS  # 4.6%

# calculate biracial distribution of ancestry
dat <- attM   # not rawM, which is full sample
dat$raceFirst <- NA
dat$raceSecond <- NA
dat$nRace <- NA
for (i in 1:dim(dat)[1]) {
	temp <- dat[i,which(colnames(dat)=='A'):which(colnames(dat)=='O')]
	dat$raceFirst[i] <- which(temp==1)[2]
	dat$raceSecond[i] <- which(temp==1)[1]
	dat$nRace[i] <- sum(temp==1)
	}
round(table(dat$nRace) / sum(table(dat$nRace)),3)
datMoBi <- dat[dat$nRace<=2,]	
datMoBi$raceFirst[is.na(datMoBi$raceFirst)] <- datMoBi$raceSecond[is.na(datMoBi$raceFirst)]
(counts <- table(datMoBi$raceFirst, datMoBi$raceSecond))
round(t( counts / (sum(counts)-sum(diag(counts))) ), 3)

dat <- attS   # not rawS, which is full sample
dat$raceFirst <- NA
dat$raceSecond <- NA
dat$nRace <- NA
for (i in 1:dim(dat)[1]) {
	temp <- dat[i,which(colnames(dat)=='A'):which(colnames(dat)=='O')]
	dat$raceFirst[i] <- which(temp==1)[2]
	dat$raceSecond[i] <- which(temp==1)[1]
	dat$nRace[i] <- sum(temp==1)
	}
round(table(dat$nRace) / sum(table(dat$nRace)),3)
datMoBi <- dat[dat$nRace<=2,]	
datMoBi$raceFirst[is.na(datMoBi$raceFirst)] <- datMoBi$raceSecond[is.na(datMoBi$raceFirst)]
(counts <- table(datMoBi$raceFirst, datMoBi$raceSecond))
round(t( counts / (sum(counts)-sum(diag(counts))) ), 3)

# means and SDs
dat <- attM
dim(dat)
table(dat$Gradecohort, useNA='always')
round(table(dat$Gradecohort) / sum(table(dat$Gradecohort)), 3)
c(summary(dat$Female), SD=sd(dat$Female, na.rm=T))
c(summary(dat$pared8), SD=sd(dat$pared8, na.rm=T))
c(summary(dat$ImmigGen), SD=sd(dat$ImmigGen, na.rm=T))
dat1 <- attM1  # excludes t1 8th graders
dim(dat1)
c(summary(dat1$acad9_W1), SD=sd(dat1$acad9_W1, na.rm=T))
c(summary(dat$acad9_W2), SD=sd(dat$acad9_W2, na.rm=T))
c(summary(dat1$eriExp1noImp), SD=sd(dat1$eriExp1noImp, na.rm=T))
c(summary(dat$eriExp2noImp), SD=sd(dat$eriExp2noImp, na.rm=T))
c(summary(dat1$eriCen1noImp), SD=sd(dat1$eriCen1noImp, na.rm=T))
c(summary(dat$eriCen2noImp), SD=sd(dat$eriCen2noImp, na.rm=T))
c(summary(dat1$eriRes1noImp), SD=sd(dat1$eriRes1noImp, na.rm=T))
c(summary(dat$eriRes2noImp), SD=sd(dat$eriRes2noImp, na.rm=T))
c(summary(dat1$disc1noImp), SD=sd(dat1$disc1noImp, na.rm=T))
c(summary(dat$disc2noImp), SD=sd(dat$disc2noImp, na.rm=T))
c(summary(dat$PERLA), SD=sd(dat$PERLA, na.rm=T))
cor.test(dat$PERLA, dat$multi)
loM <- loess(multi ~ PERLA, dat)
predM <- predict(loM, data.frame(PERLA = seq(1, 11, 1)), se = TRUE)
plot(as.numeric(names(predM$fit)),predM$fit, ylim=c(0,.4), type='l',xlab='PERLA',ylab='Proportion Multiracial', lty=2)


dat <- attS
dim(dat)
table(dat$Gradecohort, useNA='always')
round(table(dat$Gradecohort) / sum(table(dat$Gradecohort)), 3)
c(summary(dat$Female), SD=sd(dat$Female, na.rm=T))
c(summary(dat$pared8), SD=sd(dat$pared8, na.rm=T))
c(summary(dat$ImmigGen), SD=sd(dat$ImmigGen, na.rm=T))
dat1 <- attS1  # excludes t1 8th graders
dim(dat1)
c(summary(dat1$acad9_W1), SD=sd(dat1$acad9_W1, na.rm=T))
c(summary(dat$acad9_W2), SD=sd(dat$acad9_W2, na.rm=T))
c(summary(dat1$eriExp1noImp), SD=sd(dat1$eriExp1noImp, na.rm=T))
c(summary(dat$eriExp2noImp), SD=sd(dat$eriExp2noImp, na.rm=T))
c(summary(dat1$eriCen1noImp), SD=sd(dat1$eriCen1noImp, na.rm=T))
c(summary(dat$eriCen2noImp), SD=sd(dat$eriCen2noImp, na.rm=T))
c(summary(dat1$eriRes1noImp), SD=sd(dat1$eriRes1noImp, na.rm=T))
c(summary(dat$eriRes2noImp), SD=sd(dat$eriRes2noImp, na.rm=T))
c(summary(dat1$disc1noImp), SD=sd(dat1$disc1noImp, na.rm=T))
c(summary(dat$disc2noImp), SD=sd(dat$disc2noImp, na.rm=T))
c(summary(dat$PERLA), SD=sd(dat$PERLA, na.rm=T))
cor.test(dat$PERLA, dat$multi)
loS <- loess(multi ~ PERLA, dat)
predS <- predict(loS, data.frame(PERLA = seq(1, 11, 1)), se = TRUE)
plot(as.numeric(names(predS$fit)),predA$fit, ylim=c(0,.6), type='l',xlab='PERLA',ylab='Proportion Multiracial', lty=2)
lines(as.numeric(names(predM$fit)),predM$fit, col='blue', lwd=3)

# calculate odds of same self-classification
calcOR <- function(MAT, ATT) {
	tempNet <- network(MAT)
	set.vertex.attribute(tempNet, 'race', ATT)
	tempERGM <- ergm(tempNet ~ edges + nodematch('race'))
	or <- exp(coef(tempERGM)[2])
	return(or)
	}
calcOR(matM1_, attM12$chooseOne1)
calcOR(matM_2, attM12$chooseOne2)
calcOR(matM2_, attM23$chooseOne2)
calcOR(matM_3, attM23$chooseOne3)
calcOR(matS1_, attS12$chooseOne1)
calcOR(matS_2, attS12$chooseOne2)
calcOR(matS2_, attS23$chooseOne2)
calcOR(matS_3, attS23$chooseOne3)

calcOR2 <- function(MAT, ATT1, ATT2) {
	tempNet <- network(MAT)
	momo <- outer(ATT1$mono==1, ATT1$mono==1, "*")
	sameClass <- outer(ATT2, ATT2, "==")
	
	rDums <- as.matrix(ATT1[,which(names(ATT1)=='A'): which(names(ATT1)=='N')], ncol=5)
	nInCommon <- rDums %*% t(rDums)
	anyAncestry <- nInCommon
	anyAncestry[anyAncestry>=1] <- 1
	anyAncestry_momo <- anyAncestry*momo
	excludeClass <- nInCommon-sameClass	
	excludeClass[excludeClass==-1] <- 0
	excludeClass[excludeClass>=1] <- 1
	excludeClass[momo] <- 0
#	tempERGM <- ergm(tempNet ~ edges + edgecov(excludeClass) + edgecov(momo) )
#	tempERGM <- ergm(tempNet ~ edges + edgecov(anyAncestry) )
	tempERGM <- ergm(tempNet ~ edges + edgecov(anyAncestry) + edgecov(anyAncestry_momo) )
	or <- exp(coef(tempERGM)[2])
	return( c(coef(tempERGM)[2], diag(tempERGM$covar)[2]**.5, or) )
#	return(tempERGM)
	}
calcOR2(matM1_, attM12, attM12$chooseOne1)
calcOR2(matM_2, attM12, attM12$chooseOne2)
calcOR2(matM2_, attM23, attM23$chooseOne2)
calcOR2(matM_3, attM23, attM23$chooseOne3)
calcOR2(matS1_, attS12, attS12$chooseOne1)
calcOR2(matS_2, attS12, attS12$chooseOne2)
calcOR2(matS2_, attS23, attS23$chooseOne2)
calcOR2(matS_3, attS23, attS23$chooseOne3)



